# -*- mode: python; coding: utf-8; -*-
# vim: set syntax=python fileencoding=utf-8

__license__ = "GPL v3"
__copyright__ = "2023, Tomás Di Domenico <tdido at tdido.eu>"

"""
www.eldiplo.org
"""

from calibre.ptempfile import PersistentTemporaryFile
from calibre.web.feeds.news import BasicNewsRecipe


class ElDiplo2023(BasicNewsRecipe):
    title = "Le Monde Diplomatique - cono sur"
    __author__ = "Darko Miletic and Tomás Di Domenico"
    description = "Publicación de Le Monde Diplomatique para el cono sur."
    publisher = "Capital Intelectual"
    category = "news, politics, Argentina, Uruguay, Paraguay, South America, World"
    oldest_article = 31
    no_stylesheets = True
    encoding = "utf8"
    use_embedded_content = False
    language = "es_AR"
    remove_empty_feeds = True
    publication_type = "magazine"
    auto_cleanup = True
    delay = 1
    simultaneous_downloads = 1
    timeout = 8
    needs_subscription = True
    ignore_duplicate_articles = {"url"}
    articles_are_obfuscated = True
    temp_files = []
    fetch_retries = 10
    handle_gzip = True
    compress_news_images = True
    scale_news_images_to_device = True
    masthead_url = (
        "https://www.eldiplo.org/wp-content/themes/_polenta_/assets/diplo.png"
    )
    INDEX = "https://www.eldiplo.org/"

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open(self.INDEX)
        if self.username is not None and self.password is not None:
            br.select_form(id="loginform")
            br["log"] = self.username
            br["pwd"] = self.password
            br.submit()
        return br

    def get_cover_url(self):
        soup_index = self.index_to_soup(self.INDEX)
        tag_sumario = soup_index.find("span", text="Sumario")
        url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"]

        soup = self.index_to_soup(url_sumario)

        container = soup.find("div", class_="px-16")
        url = container.find("img")["src"]

        return getattr(self, "cover_url", url)

    def _process_article(self, article):
        url = article.find("a", href=True, attrs={"class": "title"})["href"]
        title = self.tag_to_string(article).replace("Editorial", "Editorial: ")
        try:
            title, authors = title.split(", por")
            authors = f"por {authors}"
        except ValueError:
            authors = ""
        self.log("title: ", title, " url: ", url)
        return {"title": title, "url": url, "description": authors, "date": ""}

    def preprocess_html(self, soup):
        # cleanup internal references' anchor links, leave the inner text
        # it would be nice to eventually make the internal links work
        import re

        for l in soup.find_all(name="a", attrs={"href": re.compile(r"#")}):
            l.replaceWithChildren()

        return soup

    def parse_index(self):
        soup_index = self.index_to_soup(self.INDEX)

        tag_sumario = soup_index.find("span", text="Sumario")

        if tag_sumario is None:
            return None

        url_sumario = "https://www.eldiplo.org" + tag_sumario.parent["href"]
        self.log(url_sumario)

        soup_sumario = self.index_to_soup(url_sumario)

        feeds = []
        articles = []
        dossiers = []

        sumario = soup_sumario.find("div", class_="sumario")

        for section in sumario.find_all("div", recursive=False):
            classes = section.attrs["class"]

            if "dossier" in classes:
                dtitle = self.tag_to_string(section.find("h3"))
                darticles = []
                for article in section.find_all("div", recursive=False):
                    darticles.append(self._process_article(article))
                dossiers.append((dtitle, darticles))
            else:
                articles.append(self._process_article(section))
        feeds.append(("Artículos", articles))
        feeds += dossiers

        return feeds

    def get_obfuscated_article(self, url):
        result = None
        count = 0
        while count < self.fetch_retries:
            try:
                response = self.browser.open(url, timeout=self.timeout)
                html = response.read()
                count = self.fetch_retries
                tfile = PersistentTemporaryFile("_fa.html")
                tfile.write(html)
                tfile.close()
                self.temp_files.append(tfile)
                result = tfile.name
            except:
                self.info("Retrying download...")
            count += 1
        return result
